We need the following packages:
required_packages <- c("readxl",
"purrr",
"dplyr",
"stringr",
"tidyr",
"magrittr",
"AMR")
Installing those that are not already installed on the system:
for (package in required_packages) {
if (!package %in% installed.packages()) install.packages(package)
}
Loading the packages:
devnull <- lapply(required_packages, require, character.only = TRUE)
Date and type of antimicrobial use per farm:
amu <- read_excel("AMU_KietStudy_Marc.xlsx")
Antimicrobial class of each antimicrobial:
classes <- read_excel("FarmvsClass.xlsx")
The names of the farms:
(farms <- paste0("K", c(formatC(6:9, 1, flag = "0"), 11:26)))
## [1] "K06" "K07" "K08" "K09" "K11" "K12" "K13" "K14" "K15" "K16" "K17" "K18"
## [13] "K19" "K20" "K21" "K22" "K23" "K24" "K25" "K26"
Let’s start by looking at the resistance genes concentrations in chicken only (maybe we’ll look at the rat data later on):
genes <- farms %>%
map(read_excel, path = "KietAnalysisData.xlsx") %>%
map(filter, Group.1 %in% c("Chicken-Control", "Chicken-Treatment")) %>%
map(select, -Group.1, -TotalLog2Value) %>%
setNames(farms)
Let’s check that the names of the columns are the same for all the farms:
tmp <- genes %>%
map_df(names) %>%
apply(1, unique)
tmp[map_int(tmp, length) > 1]
## [[1]]
## [1] "FarmID" "K09"
There is a problem with with the FarmID variable that is
called K09 is one farm. Let’s fix this. The names of the
variables seem OK in the first farm:
(correct_names <- names(genes[[1]]))
## [1] "FarmID" "Group" "Sample_Name" "SamplingDay" "aac3-liacde"
## [6] "aac6-aph2" "aac6-lb" "aac6-li" "aac6-lla" "aadA"
## [11] "aadE" "aadE-like" "acrA" "acrB" "acrF"
## [16] "aph2-lb" "aph2-lde" "aph3-lac" "aph3-lll" "arnA"
## [21] "bacA_1" "bacA_2" "blaAMPC" "blaCMY2" "blaCTXM"
## [26] "blaDHA" "blaPSE" "blaSHV" "blaTEM" "blaZ"
## [31] "cat" "cblA" "cepA" "cepA2" "cfr"
## [36] "cfr2" "cfxA" "cmlA1-01" "cmr" "dfrA"
## [41] "dfrF" "ermA" "ermB" "ermC" "floR"
## [46] "folA" "fosB" "fox5" "macB" "mcr-1"
## [51] "mcr-2" "mcr-3" "mdtF" "mdtL" "mdtO"
## [56] "mecA" "mefa10" "mefA3" "mfsA" "oprD"
## [61] "oqxA" "oqxB" "qacA" "qacC" "qacE"
## [66] "qnrA" "qnrB" "qnrS" "sat4" "spc"
## [71] "strB" "sul1" "sul2" "sul3" "tetB"
## [76] "tetC-01" "tetM" "tetO" "tetQ" "tetW"
## [81] "tolC" "vanA" "vanB" "vatA"
Let’s use them as variables names for all the farms:
genes %<>% map(setNames, correct_names)
Note also that not all farms have a “Before” measurement in the control group:
tmp <- genes %>%
map(~ .x %>% filter(SamplingDay == "Before") %>% select(Group)) %>%
.[map_int(., nrow) < 2] %>%
unlist()
tmp
## K14.Group K15.Group K16.Group K17.Group K18.Group K20.Group
## "Treatment" "Treatment" "Treatment" "Treatment" "Treatment" "Treatment"
## K21.Group K22.Group K23.Group
## "Treatment" "Treatment" "Treatment"
For the farms where the “Before” measurement is missing in the
control group, let’s just use the “Before” measurement of the treatment
group. For that, we need the following function where x is
a dataframe for one farm:
control_before <- function(x) {
y <- filter(x, SamplingDay == "Before")
y$Group <- "Control"
y$Sample_Name <- NA
rbind(x, y)
}
Let’s now we can use this function on all the farms that needed to be fixed:
farms_to_fix <- tmp %>%
names() %>%
str_remove(".Group")
fixed_farms <- genes %>%
magrittr::extract(farms_to_fix) %>%
map(control_before)
genes[farms_to_fix] <- fixed_farms
Names of resistance genes:
(resistance_genes <- setdiff(names(genes[[1]]),
c("FarmID", "Group", "Sample_Name", "SamplingDay")))
## [1] "aac3-liacde" "aac6-aph2" "aac6-lb" "aac6-li" "aac6-lla"
## [6] "aadA" "aadE" "aadE-like" "acrA" "acrB"
## [11] "acrF" "aph2-lb" "aph2-lde" "aph3-lac" "aph3-lll"
## [16] "arnA" "bacA_1" "bacA_2" "blaAMPC" "blaCMY2"
## [21] "blaCTXM" "blaDHA" "blaPSE" "blaSHV" "blaTEM"
## [26] "blaZ" "cat" "cblA" "cepA" "cepA2"
## [31] "cfr" "cfr2" "cfxA" "cmlA1-01" "cmr"
## [36] "dfrA" "dfrF" "ermA" "ermB" "ermC"
## [41] "floR" "folA" "fosB" "fox5" "macB"
## [46] "mcr-1" "mcr-2" "mcr-3" "mdtF" "mdtL"
## [51] "mdtO" "mecA" "mefa10" "mefA3" "mfsA"
## [56] "oprD" "oqxA" "oqxB" "qacA" "qacC"
## [61] "qacE" "qnrA" "qnrB" "qnrS" "sat4"
## [66] "spc" "strB" "sul1" "sul2" "sul3"
## [71] "tetB" "tetC-01" "tetM" "tetO" "tetQ"
## [76] "tetW" "tolC" "vanA" "vanB" "vatA"
Here we compute aggregates of resistance genes, these aggregates
being the sum of all the resistance genes but also the sum of all the
resistance genes by class of antimicrobial against which they are
effective. The correspondence between resistance genes and antimicrobial
resistance is in the classes data frame:
classes
## # A tibble: 81 × 2
## EvaGreen_Name Antimicrobial_Class
## <chr> <chr>
## 1 aac3-liacde Aminoglycoside
## 2 aac6-aph2 Aminoglycoside
## 3 aac6-lb Aminoglycoside
## 4 aac6-li Aminoglycoside
## 5 aac6-lla Aminoglycoside
## 6 aadA Aminoglycoside
## 7 aadE Aminoglycoside
## 8 aadE-like Aminoglycoside
## 9 acrA Multidrug
## 10 acrB Multidrug
## # … with 71 more rows
Let’s add a category All that corresponds to all the
resistance genes:
classes %<>%
bind_rows(data.frame(EvaGreen_Name = resistance_genes,
Antimicrobial_Class = "All"))
The antimicrobial classes to which each resistance gene corresponds are now:
(classes_names <- unique(classes$Antimicrobial_Class))
## [1] "Aminoglycoside" "Multidrug" "Polymyxin" "Other"
## [5] "Beta-Lactam" "Phenicol" "Sulfonamide" "MLSB"
## [9] "Quinolone" "Tetracycline" "Glycopeptide" "All"
The following function calculates the sum of the genes concentration
for a given class am_class for the data frame of a given
farm farm:
sum_by_class <- function(am_class, farm) {
classes %>%
filter(Antimicrobial_Class == am_class) %>%
pull(EvaGreen_Name) %>%
extract(farm, .) %>%
rowSums()
}
The following function uses the one above and adds as many variables as antimicrobial classes to the data frame of a given farm:
add_sums_by_class <- function(farm) {
farm %>%
map_dfc(classes_names, sum_by_class, .) %>%
setNames(classes_names) %>%
bind_cols(farm, .)
}
Let’s add the sums of concentrations to all the farms:
genes %<>% map(add_sums_by_class)
The classes of the antimicrobials used:
To work out the classes of antimicrobials used we first need to
define a hash table that allows to match classes names as found by the
AMR package and the classes names recorded in our
amu data:
# AMR package: amu dataset:
hash <- c("Aminoglycosides" = "Aminoglycoside",
"Amphenicols" = "Phenicol",
"Antifungals/antimycotics" = "Antifungals/antimycotics",
"Beta-lactams/penicillins" = "Beta-Lactam",
"Cephalosporins (3rd gen.)" = "Beta-Lactam",
"Macrolides/lincosamides" = "MLSB",
"Other antibacterials" = "Other",
"Polymyxins" = "Polymyxin",
"Quinolones" = "Quinolone",
"Tetracyclines" = "Tetracycline",
"Trimethoprims" = "Sulfonamide")
Let’s generate the correspondence data frame:
amu_classes <- amu %>%
select(AAI) %>%
unique() %>%
mutate(group = map_chr(AAI, ab_group),
class = hash[group])
## Warning: in `as.ab()`: these values could not be coerced to a valid antimicrobial
## ID: "bromhexine".
amu_classes
## # A tibble: 25 × 3
## AAI group class
## <chr> <chr> <chr>
## 1 sulfadimidine Trimethoprims Sulfonamide
## 2 sulfaquinoxaline Other antibacterials Other
## 3 doxycyline Tetracyclines Tetracycline
## 4 tylosin Macrolides/lincosamides MLSB
## 5 oxytetracyline Tetracyclines Tetracycline
## 6 colistin Polymyxins Polymyxin
## 7 ampicillin Beta-lactams/penicillins Beta-Lactam
## 8 flumequine Quinolones Quinolone
## 9 enrofloxacin Quinolones Quinolone
## 10 ceftiofur Cephalosporins (3rd gen.) Beta-Lactam
## # … with 15 more rows
Let’s check for missing values:
amu_classes %>%
filter(is.na(AAI) | is.na(group) | is.na(class))
## # A tibble: 1 × 3
## AAI group class
## <chr> <chr> <chr>
## 1 bromhexine <NA> <NA>
Let’s fix it manually:
amu_classes[amu_classes$AAI == "bromhexine", "class"] <- "Mucoactive agent"
Finally, we can use amu_classes to make another hash
table:
hash <- with(amu_classes, setNames(class, AAI))
And we can use this new hash table to add the antimicrobial class to
the amu dataframe:
(amu %<>% mutate(class = hash[AAI]))
## # A tibble: 90 × 6
## FarmID AgeUse_Day DurationOfUse_days ProductNo AAI class
## <chr> <dbl> <dbl> <dbl> <chr> <chr>
## 1 K06 30 4 1 sulfadimidine Sulfonamide
## 2 K06 30 4 1 sulfaquinoxaline Other
## 3 K06 35 3 2 doxycyline Tetracycline
## 4 K06 35 3 2 tylosin MLSB
## 5 K07 1 4 1 oxytetracyline Tetracycline
## 6 K07 1 4 1 colistin Polymyxin
## 7 K07 36 1 2 ampicillin Beta-Lactam
## 8 K07 36 1 2 colistin Polymyxin
## 9 K07 38 3 3 flumequine Quinolone
## 10 K07 38 3 4 enrofloxacin Quinolone
## # … with 80 more rows
The time points:
genes %>%
map(pull, SamplingDay) %>%
unlist() %>%
unique()
## [1] "Before" "7" "14" "60" "End" "After" "30" "90"
Generating new time points:
genes %<>% map(mutate,
SamplingDay2 = as.integer(recode(SamplingDay,
Before = "-7",
After = "0",
End = "120"))) %>%
map(arrange, Group, SamplingDay2) # making sure that data are arranged chronologically
Some customized lines() and points()
function:
lines2 <- function(...) lines(..., lwd = 2)
A utility function for the function
plot_gene_concentration() that follows after. This function
adds the dots and lines to a plot:
plot_data <- function(x, gene, col, lwd = 2, lty = 3) {
lines2 <- function(...) lines(..., col = col, lwd = lwd)
nrows <- nrow(x)
x2 <- x[-c(1, nrows), ]
x3 <- x[1:2, ]
x4 <- x[(nrows - 1):nrows, ]
points(x$SamplingDay2, x[[gene]], col = col, lwd = lwd)
lines2(x2$SamplingDay2, x2[[gene]])
lines2(x3$SamplingDay2, x3[[gene]], lty = lty)
lines2(x4$SamplingDay2, x4[[gene]], lty = lty)
}
The following function plots a given gene concentration as a function of time for control and treatment:
plot_gene_concentration <- function(farm, gene, text) {
farm_dataset <- genes[[farm]]
plot(farm_dataset$SamplingDay2, farm_dataset[[gene]], type = "n",
xlim = c(-10, 120), xlab = "time after treatment (days)",
ylab = "gene concentration", axes = FALSE)
ats <- c(-7, seq(0, 120, 20))
lbs <- ats
lbs[1] <- "before"
lbs[length(lbs)] <- "end"
axis(1, ats, lbs)
axis(2)
farm_dataset %>%
filter(Group == "Control") %>%
plot_data(gene, "blue")
farm_dataset %>%
filter(Group == "Treatment") %>%
plot_data(gene, "red")
mtext(text)
abline(v = 0)
}
Plotting aac3-liacde for control and treatment in farm
K06
plot_gene_concentration(farms[1], resistance_genes[1], resistance_genes[1])
Number of columns we want and some graph tuning:
ncols <- 3
plt_val <- c(.13, .92, .22, .9)
Plotting all the genes for the first farm:
opar <- par(mfrow = c(ceiling(length(resistance_genes) / ncols), ncols), plt = plt_val)
walk(resistance_genes, ~ plot_gene_concentration(farms[1], .x, .x))
par(opar)
Plotting all the farms for the first gene:
opar <- par(mfrow = c(ceiling(length(farms) / ncols), ncols), plt = plt_val)
walk(farms, ~ plot_gene_concentration(.x, resistance_genes[1], .x))
par(opar)
Let’s now try to standardize the gene concentrations by the gene concentration before treatment. This function standardizes one experiment:
standardize_by_before_ <- function(x) {
rbind(rep(1, length(resistance_genes)),
sweep(as.matrix(x[x$SamplingDay != "Before", resistance_genes]), 2,
unlist(x[x$SamplingDay == "Before", resistance_genes]), `/`))
}
This function standardizes the control and the treatment experiments of a farm:
standardize_by_before <- function(x) {
x[, resistance_genes] <- rbind(standardize_by_before_(filter(x, Group == "Control")),
standardize_by_before_(filter(x, Group == "Treatment")))
x
}
Let’s standardize the gene concentrations for all the farms:
genes_standardized <- map(genes, standardize_by_before)
Let’s now plot all the farms on a single plot for a given antimicrobial. Let’s first define the following function that draw the layout the plot:
plot_frame <- function(...) {
plot(..., type = "n", xlab = "time (days)", ylab = "standardized genes concentrations")
}
Now, we can start exploring various option of plotting the data, starting with this function:
plot_gene_all_farms <- function(gene, infinity = -1000) {
transformation <- function(x) {
x[, 3] <- log10(x[, 3])
x
}
tmp <- genes_standardized %>%
map(extract, c("Group", "SamplingDay2", gene)) %>%
map(transformation)
tmp2 <- bind_rows(tmp)
plot_frame(tmp2[[2]], tmp2[[3]])
tmp %>%
map(function(x) {x[[3]] <- replace(x[[3]], is.infinite(x[[3]]), infinity); x}) %>%
map(~ split(.x, .x$Group)) %>%
unlist(recursive = FALSE) %>%
map(mutate, col = c("blue", "red")[(Group == "Treatment") + 1]) %>%
sample() %>%
# walk(~ lines2(.x[[2]], .x[[3]], col = .x$col))
walk(~ plot_data(.x, gene, col = .x$col))
mtext(gene)
abline(h = 0, lwd = 2)
legend("bottomright", legend = c("treatment", "control"), col = c("red", "blue"),
lty = 1, lwd = 2, bty = "n")
}
Let’s try it on one antimicrobial:
plot_gene_all_farms(resistance_genes[1], -1000)
Let’s try an alternative visualization, using this following function for box-plots:
boxplot2 <- function(x, eps, col, ...) {
boxplot(x[[3]] ~ x[[2]], at = unique(x[[2]]) + eps, add = TRUE, axes = FALSE,
boxwex = 2.5, col = adjustcolor(col, .5), outline = FALSE, ...)
}
Here is the alternative visualization:
plot_gene_all_farms_boxplot <- function(antimicrobial) {
eps <- 1.5
tmp <- genes_standardized %>%
map(filter, SamplingDay != "Before") %>%
map(extract, c("Group", "SamplingDay2", antimicrobial))
tmp2 <- bind_rows(tmp)
plot_frame(tmp2[[2]], tmp2[[3]])
control <- map(tmp, filter, Group == "Control")
walk(control, ~ points(jitter(.x[[2]] - 2), .x[[3]], col = "blue"))
control %>%
bind_rows() %>%
boxplot2(-eps, col = "blue")
treatment <- map(tmp, filter, Group == "Treatment")
walk(treatment, ~ points(jitter(.x[[2]] + 2), .x[[3]], col = "red"))
treatment %>%
bind_rows() %>%
boxplot2(eps, col = "red")
mtext(antimicrobial)
legend("topright", legend = c("treatment", "control"), fill = c("red", "blue"), bty = "n")
}
Let’s try it:
plot_gene_all_farms_boxplot(resistance_genes[1])
Mmmm… For some reason, interquartiles ranges look a bit weird on some of these boxplots. Not sure how this is calculated but I don’t really like it.
Let’s try a violin plot instead, by using this function:
vioplot2 <- function(x, eps, color, ...) {
vioplot::vioplot(x[[3]] ~ x[[2]], at = unique(x[[2]]) + eps, add = TRUE,
axes = FALSE, fill = color, lineCol = color, border = color,
wex = 4, col = adjustcolor(color, .5), ...)
}
And the new plotting function:
plot_gene_all_farms_violin <- function(antimicrobial) {
eps <- 1.5
tmp <- genes_standardized %>%
map(filter, SamplingDay != "Before") %>%
map(extract, c("Group", "SamplingDay2", antimicrobial))
tmp2 <- bind_rows(tmp)
plot_frame(tmp2[[2]], tmp2[[3]])
control <- map(tmp, filter, Group == "Control")
walk(control, ~ points(jitter(.x[[2]] - 2), .x[[3]], col = "blue"))
control %>%
bind_rows() %>%
vioplot2(-eps, "blue")
treatment <- map(tmp, filter, Group == "Treatment")
walk(treatment, ~ points(jitter(.x[[2]] + 2), .x[[3]], col = "red"))
treatment %>%
bind_rows() %>%
vioplot2(eps, "red")
mtext(antimicrobial)
legend("topright", legend = c("treatment", "control"), fill = c("red", "blue"), bty = "n")
}
Let’s try it too:
plot_gene_all_farms_violin(resistance_genes[1])
Not sure I like it either…
Let’s plot paired treatment and control for each farm instead. For that, we need the following function:
plot_gene_all_farms_pairwise <- function(antimicrobial) {
col_val <- adjustcolor(c("blue", "red"), .5)
genes %>%
map(extract, c("SamplingDay2", "Group", antimicrobial)) %>%
map(pivot_wider, names_from = Group, values_from = 3) %>%
bind_rows() %>%
mutate(SamplingDay3 = jitter(SamplingDay2),
color = (Treatment > Control) + 1) %>%
with({
plot_frame(rep(SamplingDay3, 2), c(Control, Treatment))
points(SamplingDay3, Control, col = "blue")
points(SamplingDay3, Treatment, col = "red")
arrows(SamplingDay3, Control, SamplingDay3, Treatment, 0, col = col_val[color])
})
mtext(antimicrobial)
}
Let’s try it:
plot_gene_all_farms_pairwise(resistance_genes[1])
Let’s plot the difference between treatment and control for each farm. For that, we need the following function:
plot_differences <- function(antimicrobial) {
tmp <- genes %>%
map(extract, c("SamplingDay2", "Group", antimicrobial)) %>%
map(pivot_wider, names_from = Group, values_from = 3) %>%
map(mutate, difference = Treatment - Control)
tmp %>%
bind_rows() %$%
plot_frame(SamplingDay2, difference)
walk(tmp, ~ with(.x, lines2(SamplingDay2, difference, col = "green")))
abline(h = 0)
mtext(antimicrobial)
}
Let’s try it:
plot_differences(resistance_genes[1])
Let’s consider this function with boxplots:
plot_differences_boxplot <- function(antimicrobial) {
tmp <- genes %>%
map(extract, c("SamplingDay2", "Group", antimicrobial)) %>%
map(pivot_wider, names_from = Group, values_from = 3) %>%
map(mutate, difference = Treatment - Control) %>%
bind_rows()
with(tmp, plot(jitter(SamplingDay2), difference, col = "green",
xlab = "time (days)",
ylab = "difference in standardized genes concentrations"))
tmp %>%
select(2, 1, 3) %>%
boxplot2(0, col = "green")
abline(h = 0)
mtext(antimicrobial)
}
Let’s try it:
plot_differences_boxplot(resistance_genes[1])
The boxplot still looks weird.
Let’s look at the violin alternative:
plot_differences_violin <- function(antimicrobial) {
tmp <- genes %>%
map(extract, c("SamplingDay2", "Group", antimicrobial)) %>%
map(pivot_wider, names_from = Group, values_from = 3) %>%
map(mutate, difference = Treatment - Control) %>%
bind_rows()
with(tmp, plot(jitter(SamplingDay2), difference, col = "green",
xlab = "time (days)",
ylab = "difference in standardized genes concentrations"))
tmp %>%
select(2, 1, 3) %>%
vioplot2(0, col = "green")
abline(h = 0)
mtext(antimicrobial)
}
Let’s try it:
plot_differences_violin(resistance_genes[1])
Same comment.
Let’s consider another option.
plot_differences_quantiles <- function(antimicrobial) {
tmp <- genes %>%
map(extract, c("SamplingDay2", "Group", antimicrobial)) %>%
map(pivot_wider, names_from = Group, values_from = 3) %>%
map(mutate, difference = Treatment - Control) %>%
bind_rows()
with(tmp, plot(jitter(SamplingDay2), difference, col = "darkgrey",
xlab = "time (days)",
ylab = "difference in standardized genes concentrations"))
x_val <- sort(unique(tmp$SamplingDay2))
tmp %>%
select(SamplingDay2, difference) %>%
group_by(SamplingDay2) %>%
group_split() %>%
map(~ quantile(.x$difference, c(.25, .5, .75), na.rm = TRUE)) %>%
bind_rows() %>%
mutate(x_val = x_val) %>%
with({
points(x_val, `50%`, lwd = 2)
arrows(x_val, `25%`, x_val, `75%`, .1, 90, 3, lwd = 2, col = "black")
lines(x_val, `25%`, lty = 3)
lines(x_val, `50%`, lty = 2)
lines(x_val, `75%`, lty = 3)
})
abline(h = 0)
mtext(antimicrobial)
}
Let’s try it:
plot_differences_quantiles(resistance_genes[1])